The data is simulated via the following code. Histogram of each variable is shown below. Each of x2 : x7 has its own data structure while x1, x8:x10 are randomly generated normal variables.
set.seed(1234)
x1 <- rnorm(1000, 0, 1)
x2 <- sample(c(rnorm(500, -3, 1), rnorm(500, 3, 1)), size = 1000)
x3 <- sample(c(rep(-1, 500), rep(1, 500)), size = 1000)
x4 <- sample(c(rnorm(250, -3, 1), rnorm(750, 3, 1)), size = 1000)
x5 <- sample(c(rnorm(330, -5, 1), rnorm(340, 0, 1), rnorm(330, 5, 1)), size = 1000)
x6 <- sample(c(rnorm(450, -5, 1), rnorm(100, 0, 1), rnorm(450, 5, 1)), size = 1000)
x7 <- sample(c(rnorm(500, -5, 1), rnorm(500, 5, 1)), size = 1000)
x8 <- rnorm(1000, 0, 1)
x9 <- rnorm(1000, 0, 1)
x10 <- rnorm(1000, 0, 1)
data_mult <- tibble::tibble(x1 = x1, x2 = x2, x3 = x3,
x4 = x4, x5 = x5, x6 = x6, x7 = x7,
x8 = x8, x9 = x9, x10 = x10) %>% map_df(scale)
origin_dt_bi <- data_mult %>%
dplyr::select(-x3) %>%
gather(names, values) %>%
mutate(names = as_factor(names))
origin_dt_bi %>%
ggplot(aes(x = values)) +
geom_histogram(binwidth = 0.3) +
geom_density(aes(y = 0.3 * ..count..)) +
facet_wrap(vars(names), ncol = 3)
Searching method Search_geodesic() is performed on x1, x2, x8:x10 with an additional variable from x3: x7. The following few screenshots are taken at the end of each trial and projection pursuit are able to find the data structure in all the caese.
run_geodesic <- function(var){
set.seed(12345)
mult_geodesic <- data_mult[,c(1,2,var,8,9,10)] %>%
animate_xy(tour_path = guided_tour(holes(), d = 2,
search_f = search_geodesic_latest),
rescale = FALSE)
return(mult_geodesic)
}
Searching method Search_geodesic() is performed on x1, x8, x9, x10 with three additional variables choosing from x2, x3, x4, x5, x6, x7. Notice that the pattern in x3 is very different from other varaibles, it is always found in the tour, while less distinct informatiove variables are less likely to be found. We could allow for sequential step so that the variables found could be put aside in the next tour. i.e. with x1: x4, x8:x10, x3 is found first, then run the tour again with all the other variables other than x3. See the following examples:
x2, x3, x4: x3 is recognised while x2 and x4 are mixedx2, x5, x6: without x3, x5 and x6 are more distinct and thus foundx3, x5, x6: when x3 is added back, the tour can’t find x5, x6 againSimilar to 1-tour_optim_documentation.html, PCA plot and index_val tracing plot are provided. With two columns in each projection basis, PCA is done separately for each columns. The following demo uses variable x1, x2, x7, x8, x9, x10.
proj <- mult_geodesic %>%
filter(info == "interpolation") %>% pull(basis) %>% tail(1)
proj[[1]]
## [,1] [,2]
## [1,] -0.001738144 0.03945460
## [2,] 0.496024430 -0.86384416
## [3,] 0.862833666 0.49212418
## [4,] -0.028429470 -0.05467946
## [5,] 0.077831028 0.06522615
## [6,] -0.051077434 0.05278356
projected <- as.matrix(data_mult[,c(1,2,7,8,9,10)]) %*% proj[[1]] %>% as_tibble()
projected %>%
ggplot(aes(x = V1, y = V2)) +
geom_point() +
ggtitle("without rounding")
# colour by parts of optimisation (static)
mult_releveled <- pca_v1 %>%
filter(info != "interpolation")
p1 <- pca_v1 %>%
ggplot(aes(x = PC1, y = PC2, col = info)) +
geom_point() +
geom_point(data = mult_releveled) +
theme(aspect.ratio = 1) +
ggtitle("PCA plot for v1")
# colour by parts of optimisation (static)
mult_releveled <- pca_v2 %>%
filter(info != "interpolation")
p2 <- pca_v2 %>%
ggplot(aes(x = PC1, y = PC2, col = info)) +
geom_point() +
geom_point(data = mult_releveled) +
theme(aspect.ratio = 1) +
ggtitle("PCA plot for v2")
p1 + p2 + plot_layout(guides = "collect")
mult_animate_v1 <- pca_v1 %>%
mutate(loop = ifelse(info == "start", 1, loop)) %>%
group_by(tries, loop, info3) %>%
mutate(animate_id = group_indices())
mult_animate_v2 <- pca_v2 %>%
mutate(loop = ifelse(info == "start", 1, loop)) %>%
group_by(tries, loop, info3) %>%
mutate(animate_id = group_indices())
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
mult_path_v1 <- mult_animate_v1 %>%
ungroup() %>%
ggplot(aes(x = animate_id, y = index_val, col = info)) +
geom_point() +
theme(legend.position = "none")
mult_path_v2 <- mult_animate_v2 %>%
ungroup() %>%
ggplot(aes(x = animate_id, y = index_val, col = info)) +
geom_point()
last_tries_v1 <- mult_animate_v1$tries[which.max(mult_animate_v1$tries)]
last_tries_v2 <- mult_animate_v2$tries[which.max(mult_animate_v2$tries)]
direction_v1 <- mult_animate_v1 %>%
filter(tries == last_tries_v1, info %in% c("direction_search", "best_direction_search")) %>%
ggplot(aes(x = animate_id, y = index_val, col = info)) +
geom_point() +
xlim(1, max(mult_animate_v1$animate_id)) +
scale_color_manual(values = c(gg_color_hue(6)[2:3], gg_color_hue(6)[5:6])) +
theme(legend.position = "none")
direction_v2 <- mult_animate_v2 %>%
filter(tries == last_tries_v2, info %in% c("direction_search", "best_direction_search")) %>%
ggplot(aes(x = animate_id, y = index_val, col = info)) +
geom_point() +
xlim(1, max(mult_animate_v2$animate_id)) +
scale_color_manual(values = c(gg_color_hue(6)[2:3], gg_color_hue(6)[5:6])) +
theme(legend.position = "none")
design <- "
13
24
24
"
direction_v1 + mult_path_v1 + direction_v2 + mult_path_v2 +
plot_layout(design = design, guides = "collect")
geo_animate <- mult_animate_v1 %>%
ggplot(aes(x = PC1, y = PC2, col = info)) +
geom_point() +
theme(aspect.ratio = 1,
legend.position = "none") +
gganimate::transition_time(animate_id) +
gganimate::shadow_mark()
p1 <- gganimate::animate(geo_animate, fps = 2)
index_val_animate <- mult_animate_v1 %>%
ggplot(aes(x = animate_id, y = index_val, col = info)) +
geom_jitter(width = 0.0005) +
theme(aspect.ratio = 1) +
gganimate::transition_time(animate_id) +
gganimate::shadow_mark()
p2 <- gganimate::animate(index_val_animate, fps = 2)
a_mgif <- image_read(p1)
b_mgif <- image_read(p2)
new_gif <- image_append(c(a_mgif[1], b_mgif[1]))
for (i in 2:100) {
combined <- image_append(c(a_mgif[i], b_mgif[i]))
new_gif <- c(new_gif, combined)
}
new_gif
geo_animate <- mult_animate_v2 %>%
ggplot(aes(x = PC1, y = PC2, col = info)) +
geom_point() +
theme(aspect.ratio = 1,
legend.position = "none") +
gganimate::transition_time(animate_id) +
gganimate::shadow_mark()
p1 <- gganimate::animate(geo_animate, fps = 2)
index_val_animate <- mult_animate_v2 %>%
ggplot(aes(x = animate_id, y = index_val, col = info)) +
geom_jitter(width = 0.0005) +
theme(aspect.ratio = 1) +
gganimate::transition_time(animate_id) +
gganimate::shadow_mark()
p2 <- gganimate::animate(index_val_animate, fps = 2)
a_mgif <- image_read(p1)
b_mgif <- image_read(p2)
new_gif <- image_append(c(a_mgif[1], b_mgif[1]))
for (i in 2:100) {
combined <- image_append(c(a_mgif[i], b_mgif[i]))
new_gif <- c(new_gif, combined)
}
new_gif